Hope these queries can also help you in your day to day quest to make Postgres run better!
The first three queries rank top SQL from pg_stat_statements either by execution count, mean_exec_time and total_exec_time. I love to use these queries to get a quick glance of what I should focus on tuning. Given that pg_stat_statements tracks lots of things, you can filter out certain “query text” as necessary so that those do not influence the ranking.
WITH hist AS ( SELECT queryid::text, SUBSTRING(query from 1 for 1000) query, ROW_NUMBER () OVER (ORDER BY mean_exec_time::numeric DESC) rn, SUM(mean_exec_time::numeric) mean_exec_time FROM pg_stat_statements WHERE queryid IS NOT NULL AND query::text not like '%pg\_%' AND query::text not like '%g\_%' /* Add more filters here */ GROUP BY queryid, SUBSTRING(query from 1 for 1000), mean_exec_time::numeric ), total AS ( SELECT SUM(mean_exec_time::numeric) mean_exec_time FROM hist ) SELECT DISTINCT h.queryid::text, ROUND(h.mean_exec_time::numeric,3) mean_exec_time, ROUND(100 * h.mean_exec_time / t.mean_exec_time, 1) percent, h.query FROM hist h, total t WHERE h.mean_exec_time >= t.mean_exec_time / 1000 AND rn <= 14 UNION ALL SELECT 'Others', ROUND(COALESCE(SUM(h.mean_exec_time), 0), 3) mean_exec_time, COALESCE(ROUND(100 * SUM(h.mean_exec_time) / AVG(t.mean_exec_time), 1), 0) percent, NULL sql_text FROM hist h, total t WHERE h.mean_exec_time < t.mean_exec_time / 1000 OR rn > 14 ORDER BY 3 DESC NULLS LAST;
WITH hist AS ( SELECT queryid::text, SUBSTRING(query from 1 for 100) query, ROW_NUMBER () OVER (ORDER BY total_exec_time::numeric DESC) rn, SUM(total_exec_time::numeric) total_exec_time FROM pg_stat_statements WHERE queryid IS NOT NULL AND query::text not like '%pg\_%' AND query::text not like '%g\_%' /* Add more filters here */ GROUP BY queryid, SUBSTRING(query from 1 for 100), total_exec_time::numeric ), total AS ( SELECT SUM(total_exec_time::numeric) total_exec_time FROM hist ) SELECT DISTINCT h.queryid::text, ROUND(h.total_exec_time::numeric,3) total_exec_time, ROUND(100 * h.total_exec_time / t.total_exec_time, 1) percent, h.query FROM hist h, total t WHERE h.total_exec_time >= t.total_exec_time / 1000 AND rn <= 14 UNION ALL SELECT 'Others', ROUND(COALESCE(SUM(h.total_exec_time::numeric), 0), 3) total_exec_time, COALESCE(ROUND(100 * SUM(h.total_exec_time) / AVG(t.total_exec_time), 1), 0) percent, NULL sql_text FROM hist h, total t WHERE h.total_exec_time < t.total_exec_time / 1000 OR rn > 14 ORDER BY 3 DESC NULLS LAST;
WITH hist AS ( SELECT queryid::text, SUBSTRING(query from 1 for 100) query, ROW_NUMBER () OVER (ORDER BY calls DESC) rn, calls FROM pg_stat_statements WHERE queryid IS NOT NULL AND query::text not like '%pg\_%' AND query::text not like '%g\_%' /* Add more filters here */ GROUP BY queryid, SUBSTRING(query from 1 for 100), calls ), total AS ( SELECT SUM(calls) calls FROM hist ) SELECT DISTINCT h.queryid::text, h.calls, ROUND(100 * h.calls / t.calls, 1) percent, h.query FROM hist h, total t WHERE h.calls >= t.calls / 1000 AND rn <= 14 UNION ALL SELECT 'Others', COALESCE(SUM(h.calls), 0) calls, COALESCE(ROUND(100 * SUM(h.calls) / AVG(t.calls), 1), 0) percent, NULL sql_text FROM hist h, total t WHERE h.calls < t.calls / 1000 OR rn > 14 ORDER BY 2 DESC NULLS LAST;
Shows total size for a table including a break down for its index and toast size.
SELECT *, pg_size_pretty(table_bytes) AS table, pg_size_pretty(toast_bytes) AS toast, pg_size_pretty(index_bytes) AS index, pg_size_pretty(total_bytes) AS total FROM ( SELECT *, total_bytes - index_bytes - COALESCE(toast_bytes, 0) AS table_bytes FROM ( SELECT c.oid, nspname AS table_schema, relname AS table_name, c.reltuples AS row_estimate, pg_total_relation_size(c.oid) AS total_bytes, pg_indexes_size(c.oid) AS index_bytes, pg_total_relation_size(reltoastrelid) AS toast_bytes FROM pg_class c LEFT JOIN pg_namespace n ON n.oid = c.relnamespace WHERE relkind = 'r' ) a ) a WHERE table_schema like '%' AND table_name like '%' AND total_bytes > 0 ORDER BY total_bytes DESC;
Using pg_stat_statements, this query will allocate timing totals as CPU time.
SELECT pss.userid, pss.dbid, pd.datname AS db_name, pss.queryid, round((pss.total_exec_time + pss.total_plan_time)::numeric, 2) AS total_time, pss.calls, round((pss.mean_exec_time + pss.mean_plan_time)::numeric, 2) AS mean, round((100 * (pss.total_exec_time + pss.total_plan_time) / sum((pss.total_exec_time + pss.total_plan_time)::numeric) OVER ())::numeric, 2) AS cpu_portion_pctg, substr(pss.query, 1, 200) short_query FROM pg_stat_statements pss, pg_database pd WHERE pd.oid = pss.dbid AND query::text NOT LIKE '%FOR UPDATE%' /* Add more filters here */ ORDER BY (pss.total_exec_time + pss.total_plan_time) DESC LIMIT 30;
This script looks at the database and table options set for vacuum and analyze to give a report of when vacuum / analyze is projected to run and the last time it did run. This script will give you a good idea how well vacuum and analyze is running:
WITH tbl_reloptions AS ( SELECT oid, oid::regclass table_name, substr(unnest(reloptions), 1, strpos(unnest(reloptions), '=') -1) option, substr(unnest(reloptions), 1 + strpos(unnest(reloptions), '=')) value FROM pg_class c WHERE reloptions is NOT null) SELECT s.schemaname ||'.'|| s.relname as relname, n_live_tup live_tup, n_dead_tup dead_dup, n_tup_hot_upd hot_upd, n_mod_since_analyze mod_since_stats, n_ins_since_vacuum ins_since_vac, case when avacinsscalefactor.value is not null and avacinsthresh.value is not null then ROUND(((n_live_tup * avacinsscalefactor.value::numeric) + avacinsthresh.value::numeric),0) when avacinsscalefactor.value is null and avacinsthresh.value is not null then ROUND(((n_live_tup * (select setting::numeric from pg_settings where name = 'autovacuum_vacuum_insert_scale_factor')) + avacinsthresh.value::numeric),0) when avacinsscalefactor.value is not null and avacinsthresh.value is null then ROUND(((n_live_tup * avacinsscalefactor.value::numeric) + (select setting::numeric from pg_settings where name = 'autovacuum_vacuum_insert_threshold')),0) else ROUND(((n_live_tup * (select setting::numeric from pg_settings where name = 'autovacuum_vacuum_insert_scale_factor')) + (select setting::numeric from pg_settings where name = 'autovacuum_vacuum_insert_threshold')),0) end as ins_for_vac, case when avacscalefactor.value is not null and avacthresh.value is not null then ROUND(((n_live_tup * avacscalefactor.value::numeric) + avacthresh.value::numeric),0) when avacscalefactor.value is null and avacthresh.value is not null then ROUND(((n_live_tup * (select setting::numeric from pg_settings where name = 'autovacuum_vacuum_scale_factor')) + avacthresh.value::numeric),0) when avacscalefactor.value is not null and avacthresh.value is null then ROUND(((n_live_tup * avacscalefactor.value::numeric) + (select setting::numeric from pg_settings where name = 'autovacuum_vacuum_threshold')),0) else ROUND(((n_live_tup * (select setting::numeric from pg_settings where name = 'autovacuum_vacuum_scale_factor')) + (select setting::numeric from pg_settings where name = 'autovacuum_vacuum_threshold')),0) end as mods_for_vac, case when avacanalyzescalefactor.value is not null and avacanalyzethresh.value is not null then ROUND(((n_live_tup * avacanalyzescalefactor.value::numeric) + avacanalyzethresh.value::numeric),0) when avacanalyzescalefactor.value is null and avacanalyzethresh.value is not null then ROUND(((n_live_tup * (select setting::numeric from pg_settings where name = 'autovacuum_analyze_scale_factor')) + avacanalyzethresh.value::numeric),0) when avacanalyzescalefactor.value is not null and avacanalyzethresh.value is null then ROUND(((n_live_tup * avacanalyzescalefactor.value::numeric) + (select setting::numeric from pg_settings where name = 'autovacuum_analyze_threshold')),0) else ROUND(((n_live_tup * (select setting::numeric from pg_settings where name = 'autovacuum_analyze_scale_factor')) + (select setting::numeric from pg_settings where name = 'autovacuum_analyze_threshold')),0) end as mods_for_stats, case when avacfreezeage is not null then ROUND((greatest(age(c.relfrozenxid),age(t.relfrozenxid))::numeric / avacfreezeage.value::numeric * 100),2) else ROUND((greatest(age(c.relfrozenxid),age(t.relfrozenxid))::numeric / (select setting::numeric from pg_settings where name = 'autovacuum_freeze_max_age') * 100),2) end as avac_pct_frz, greatest(age(c.relfrozenxid),age(t.relfrozenxid)) max_txid_age, to_char(last_vacuum, 'YYYY-MM-DD HH24:MI') last_vac, to_char(last_analyze, 'YYYY-MM-DD HH24:MI') last_stats, to_char(last_autovacuum, 'YYYY-MM-DD HH24:MI') last_avac, to_char(last_autoanalyze, 'YYYY-MM-DD HH24:MI') last_astats, vacuum_count vac_cnt, analyze_count stats_cnt, autovacuum_count avac_cnt, autoanalyze_count astats_cnt, c.reloptions, case when avacenabled.value is not null then avacenabled.value::text when (select setting::text from pg_settings where name = 'autovacuum') = 'on' then 'true' else 'false' end as autovac_enabled FROM pg_stat_all_tables s JOIN pg_class c ON (s.relid = c.oid) LEFT JOIN pg_class t ON c.reltoastrelid = t.oid LEFT JOIN tbl_reloptions avacinsscalefactor on (s.relid = avacinsscalefactor.oid and avacinsscalefactor.option = 'autovacuum_vacuum_insert_scale_factor') LEFT JOIN tbl_reloptions avacinsthresh on (s.relid = avacinsthresh.oid and avacinsthresh.option = 'autovacuum_vacuum_insert_threshold') LEFT JOIN tbl_reloptions avacscalefactor on (s.relid = avacscalefactor.oid and avacscalefactor.option = 'autovacuum_vacuum_scale_factor') LEFT JOIN tbl_reloptions avacthresh on (s.relid = avacthresh.oid and avacthresh.option = 'autovacuum_vacuum_threshold') LEFT JOIN tbl_reloptions avacanalyzescalefactor on (s.relid = avacanalyzescalefactor.oid and avacanalyzescalefactor.option = 'autovacuum_analyze_scale_factor') LEFT JOIN tbl_reloptions avacanalyzethresh on (s.relid = avacanalyzethresh.oid and avacanalyzethresh.option = 'autovacuum_analyze_threshold') LEFT JOIN tbl_reloptions avacfreezeage on (s.relid = avacfreezeage.oid and avacfreezeage.option = 'autovacuum_freeze_max_age') LEFT JOIN tbl_reloptions avacenabled on (s.relid = avacenabled.oid and avacenabled.option = 'autovacuum_enabled') WHERE s.relname IN ( SELECT t.table_name FROM information_schema.tables t JOIN pg_catalog.pg_class c ON (t.table_name = c.relname) LEFT JOIN pg_catalog.pg_user u ON (c.relowner = u.usesysid) WHERE t.table_schema like '%' AND (u.usename like '%' OR u.usename is null) AND t.table_name like '%' AND t.table_schema not in ('information_schema','pg_catalog') AND t.table_type not in ('VIEW') AND t.table_catalog = current_database()) AND n_dead_tup >= 0 AND n_live_tup > 0 ORDER BY 3;
To keep a well run system, it’s important to maintain as few indexes as possible. This will show which indexes have not been recently used. The original version of this was obtained from https://www.databasesoup.com/2014/05/new-finding-unused-indexes-query.html (Josh Berkus)
WITH table_scans AS ( SELECT relid, tables.idx_scan + tables.seq_scan AS all_scans, (tables.n_tup_ins + tables.n_tup_upd + tables.n_tup_del) AS writes, pg_relation_size(relid) AS table_size FROM pg_stat_all_tables AS tables WHERE schemaname NOT IN ('pg_toast', 'pg_catalog', 'partman') ), all_writes AS ( SELECT sum(writes) AS total_writes FROM table_scans ), indexes AS ( SELECT idx_stat.relid, idx_stat.indexrelid, idx_stat.schemaname, idx_stat.relname AS tablename, idx_stat.indexrelname AS indexname, idx_stat.idx_scan, pg_relation_size(idx_stat.indexrelid) AS index_bytes, indexdef ~* 'USING btree' AS idx_is_btree FROM pg_stat_user_indexes AS idx_stat JOIN pg_index USING (indexrelid) JOIN pg_indexes AS indexes ON idx_stat.schemaname = indexes.schemaname AND idx_stat.relname = indexes.tablename AND idx_stat.indexrelname = indexes.indexname WHERE pg_index.indisunique = FALSE ), index_ratios AS ( SELECT schemaname, tablename, indexname, idx_scan, all_scans, round(( CASE WHEN all_scans = 0 THEN 0.0::numeric ELSE idx_scan::numeric / all_scans * 100 END), 2) AS index_scan_pct, writes, round(( CASE WHEN writes = 0 THEN idx_scan::numeric ELSE idx_scan::numeric / writes END), 2) AS scans_per_write, pg_size_pretty(index_bytes) AS index_size, pg_size_pretty(table_size) AS table_size, idx_is_btree, index_bytes FROM indexes JOIN table_scans USING (relid) ), index_groups AS ( SELECT 'Never Used Indexes' AS reason, *, 1 AS grp FROM index_ratios WHERE idx_scan = 0 AND idx_is_btree UNION ALL SELECT 'Low Scans, High Writes' AS reason, *, 2 AS grp FROM index_ratios WHERE scans_per_write <= 1 AND index_scan_pct < 10 AND idx_scan > 0 AND writes > 100 AND idx_is_btree UNION ALL SELECT 'Seldom Used Large Indexes' AS reason, *, 3 AS grp FROM index_ratios WHERE index_scan_pct < 5 AND scans_per_write > 1 AND idx_scan > 0 AND idx_is_btree AND index_bytes > 100000000 UNION ALL SELECT 'High-Write Large Non-Btree' AS reason, index_ratios.*, 4 AS grp FROM index_ratios, all_writes WHERE (writes::numeric / (total_writes + 1)) > 0.02 AND NOT idx_is_btree AND index_bytes > 100000000 ORDER BY grp, index_bytes DESC ) SELECT reason, schemaname, tablename, indexname, index_scan_pct, scans_per_write, index_size, table_size FROM index_groups WHERE tablename LIKE '%';
This is a great query to rank from most frequent to less frequent wait events being observed on they system in pg_stat_activity. This does not provide historical reference, but a look at the current moment in time:
WITH waits AS ( SELECT wait_event, rank() OVER (ORDER BY count(wait_event) DESC) rn FROM pg_stat_activity WHERE wait_event IS NOT NULL GROUP BY wait_event ORDER BY count(wait_event) ASC ), total AS ( SELECT SUM(rn) total_waits FROM waits ) SELECT DISTINCT h.wait_event, h.rn, ROUND(100 * h.rn / t.total_waits, 1) percent FROM waits h, total t WHERE h.rn >= t.total_waits / 1000 AND rn <= 14 UNION ALL SELECT 'Others', COALESCE(SUM(h.rn), 0) rn, COALESCE(ROUND(100 * SUM(h.rn) / AVG(t.total_waits), 1), 0) percent FROM waits h, total t WHERE h.rn < t.total_waits / 1000 OR rn > 14 ORDER BY 2 DESC NULLS LAST;
Often, queries will experience poor run times when foreign keys do not have indexes supporting them. This is a good query to show those missing indexes:
WITH y AS ( SELECT pg_catalog.format('%I.%I', n1.nspname, c1.relname) AS referencing_tbl, pg_catalog.quote_ident(a1.attname) AS referencing_column, t.conname AS existing_fk_on_referencing_tbl, pg_catalog.format('%I.%I', n2.nspname, c2.relname) AS referenced_tbl, pg_catalog.quote_ident(a2.attname) AS referenced_column, pg_relation_size(pg_catalog.format('%I.%I', n1.nspname, c1.relname)) AS referencing_tbl_bytes, pg_relation_size(pg_catalog.format('%I.%I', n2.nspname, c2.relname)) AS referenced_tbl_bytes, pg_catalog.format($$CREATE INDEX ON %I.%I(%I);$$, n1.nspname, c1.relname, a1.attname) AS suggestion FROM pg_catalog.pg_constraint t JOIN pg_catalog.pg_attribute a1 ON a1.attrelid = t.conrelid AND a1.attnum = t.conkey[1] JOIN pg_catalog.pg_class c1 ON c1.oid = t.conrelid JOIN pg_catalog.pg_namespace n1 ON n1.oid = c1.relnamespace JOIN pg_catalog.pg_class c2 ON c2.oid = t.confrelid JOIN pg_catalog.pg_namespace n2 ON n2.oid = c2.relnamespace JOIN pg_catalog.pg_attribute a2 ON a2.attrelid = t.confrelid AND a2.attnum = t.confkey[1] WHERE t.contype = 'f' AND NOT EXISTS ( SELECT 1 FROM pg_catalog.pg_index i WHERE i.indrelid = t.conrelid AND i.indkey[0] = t.conkey[1])) SELECT referencing_tbl, referencing_column, existing_fk_on_referencing_tbl, referenced_tbl, referenced_column, pg_size_pretty(referencing_tbl_bytes) AS referencing_tbl_size, pg_size_pretty(referenced_tbl_bytes) AS referenced_tbl_size, suggestion FROM y ORDER BY referencing_tbl_bytes DESC, referenced_tbl_bytes DESC, referencing_tbl, referenced_tbl, referencing_column, referenced_column;
postgres.ai is a great place to get some observability queries and this is one of my favorites:
with
recursive activity
as
(
select
pg_blocking_pids(pid) blocked_by,
*,
age(clock_timestamp(), xact_start)::interval(0)
as
tx_age,
-- "pg_locks.waitstart" – PG14+ only; for older versions: age(clock_timestamp(), state_change) as wait_age,
age(clock_timestamp(), (
select
max
(l.waitstart)
from
pg_locks l
where
a.pid = l.pid))::interval(0)
as
wait_age
from
pg_stat_activity a
where
state
is
distinct
from
'idle'
), blockers
as
(
select
array_agg(
distinct
c
order
by
c)
as
pids
from
(
select
unnest(blocked_by)
from
activity
)
as
dt(c)
), tree
as
(
select
activity.*,
1
as
level
,
activity.pid
as
top_blocker_pid,
array[activity.pid]
as
path,
array[activity.pid]::
int
[]
as
all_blockers_above
from
activity, blockers
where
array[pid] <@ blockers.pids
and
blocked_by =
'{}'
::
int
[]
union
all
select
activity.*,
tree.
level
+ 1
as
level
,
tree.top_blocker_pid,
path || array[activity.pid]
as
path,
tree.all_blockers_above || array_agg(activity.pid) over ()
as
all_blockers_above
from
activity, tree
where
not
array[activity.pid] <@ tree.all_blockers_above
and
activity.blocked_by <>
'{}'
::
int
[]
and
activity.blocked_by <@ tree.all_blockers_above
)
select
pid,
blocked_by,
case
when
wait_event_type <>
'Lock'
then
replace
(state,
'idle in transaction'
,
'idletx'
)
else
'waiting'
end
as
state,
wait_event_type ||
':'
|| wait_event
as
wait,
wait_age,
tx_age,
to_char(age(backend_xid),
'FM999,999,999,990'
)
as
xid_age,
to_char(2147483647 - age(backend_xmin),
'FM999,999,999,990'
)
as
xmin_ttf,
datname,
usename,
(
select
count
(
distinct
t1.pid)
from
tree t1
where
array[tree.pid] <@ t1.path
and
t1.pid <> tree.pid)
as
blkd,
format(
'%s %s%s'
,
lpad(
'['
|| pid::text ||
']'
, 9,
' '
),
repeat(
'.'
,
level
- 1) ||
case
when
level
> 1
then
' '
end
,
left
(query, 1000)
)
as
query
from
tree
order
by
top_blocker_pid,
level
, pid;